* Monica Essig Aberg
* 17 April 2024

* CLEAN RAW DATA ---------------------------------------------------------------

	foreach company in "11111" "66666" "55555" "44444" "22222" "33333" {

		* Load data
		if "`company'"=="22222"{
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_22222_part1.csv", clear 
			tempfile part1
			save `part1'
			
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_22222_part2.csv", clear 
			append using `part1'		
		}
		else {
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_`company'.csv", clear 
		}
		
		* ID wage bins
		gen wage_bin=subinstr(wage_flag,"wage_","",.)
		replace wage_bin="30" if wage_bin=="gt30"
		replace wage_bin="7" if wage_bin=="lt8"
		drop if wage_bin == "null"
		destring wage_bin, replace
		
		* Create monthly date and year variables  
		gen str7 str_monthyear = substr(string(archive), 1, 4) + "-" + substr(string(archive), 5, 2) + "-01"
		gen mdate = monthly(str_monthyear, "YM")
		format mdate %tm
		gen month=month(dofm(mdate))
		gen year=yofd(dofm(mdate))
		la var year "Year"
		
		* Clean
		gen n_emp = freq
		keep mdate n_emp freq_previousjoin wage_bin year month
		
		* Collapse to national level
		collapse (sum) n_emp freq_previousjoin, by(mdate year month wage_bin)
		
		* Adjust for outliers --------------------------------------------------

		* Flag low-reporting months for pay rate variables
		preserve
			collapse (sum) n_emp, by(mdate)
			tsset mdate
			tssmooth ma tot_emp_ma = n_emp, window(1 0 1)
			gen outlier_rate = n_emp < tot_emp_ma*.75
			tempfile outliers_rate
			save `outliers_rate', replace
		restore
		
		* Fill in missing months
		xtset wage_bin mdate
		tsfill, full
		replace year = yofd(dofm(mdate))
		replace month = month(dofm(mdate))
		
		* Merge
		merge m:1 mdate using `outliers_rate', nogen
		drop *_ma
		
		* Manually flag additional outliers
		if "`company'" == "66666" {
			replace outlier_rate = 1 if inrange(mdate,tm(2014m2),tm(2014m10)) | mdate == tm(2023m3)
		}
		if "`company'" == "22222" {
			replace outlier_rate = 1 if mdate == tm(2016m11) | mdate == tm(2016m12) | mdate == tm(2017m9)
		}

		* Do adjustment
		sort wage_bin mdate
		foreach var of varlist n_emp {
			rename `var' `var'_unadj
			gen `var' = `var'_unadj
			replace `var' = (L.`var' + F.`var')/2 if outlier_rate == 1
			if "`company'" == "66666" {
				replace `var' = (`var'[_n+648-mdate]+`var'[_n+658-mdate])/2 if inrange(mdate, 649, 657)
				replace `var' = (`var'[_n+677-mdate]+`var'[_n+680-mdate])/2 if inrange(mdate, 678, 679)
			}
			if "`company'" == "22222" {
				replace `var' = (`var'[_n+681-mdate]+`var'[_n+684-mdate])/2 if inrange(mdate, 682, 683)
			}
		}

		* END adjust for outliers ----------------------------------------------
		
		* Save
		gen cmp_company_code = `company'
		tempfile clean_monthly_`company'
		save `clean_monthly_`company''

	}
	
	* Append
	clear
	foreach company in "11111" "66666" "55555" "44444" "22222" "33333" {
		append using `clean_monthly_`company''
	}	
	
	* Replace missing with zero
	foreach var of varlist n_emp *unadj {
		replace `var' = 0 if `var' == .
	}
	
	* Save
	save "$data/cb/clean_policy_firm_wage_bin.dta", replace
	
* MAKE STACKED -----------------------------------------------------------------
	
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	

	* Load event info
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6
	
	levelsof eventid, local(exp)
	* Build panel for each event 
	foreach experiment in `exp' {
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy  {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"' 
		
		* Load wage-firm-month level data
		use "$data/cb/clean_policy_firm_wage_bin.dta", clear
		
		* Restrict to company
		keep if cmp==`cmp_company_code'
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen mw=`mw'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'

		* Create event time variable
		gen etime=mdate-`mdate'-1
		 
		* Restrict to months close to event
		local event_start=-12
		local event_end=11
		keep if inrange(etime, `event_start',`event_end')

		tempfile experiment`experiment'
		save `experiment`experiment''

	 }
	 
	 clear

	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }

	save "$data/cb/stacked_policy_wage_bin_dataset.dta", replace	











